The Covid-19 pandemic has shook the entire world since it's onset from December 2019. Covid-19 has spread across the globe in less than 4 months and it became very important for WHO to study the growth of virus in different regions. Every region had it's peak at different time and to study the past peaks many have predicted any future wave. Also the fatality rates will help in proper distribution of vaccine.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly
plotly.offline.init_notebook_mode()
df = pd.read_csv("./data/WHO-COVID-19-global-table-data.csv")
df
| Name | WHO Region | Cases - cumulative total | Cases - cumulative total per 100000 population | Cases - newly reported in last 7 days | Cases - newly reported in last 7 days per 100000 population | Cases - newly reported in last 24 hours | Deaths - cumulative total | Deaths - cumulative total per 100000 population | Deaths - newly reported in last 7 days | Deaths - newly reported in last 7 days per 100000 population | Deaths - newly reported in last 24 hours | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Global | abs | 493392853 | 6329.980092 | 7968332 | 102.229659 | 1157995 | 6165833 | 79.104511 | 23607 | 0.302866 | 3930 |
| 1 | United States of America | Americas | 79501007 | 24018.239000 | 207355 | 62.645000 | 35049 | 975540 | 294.723000 | 3657 | 1.105000 | 610 |
| 2 | India | South-East Asia | 43031958 | 3118.248000 | 7518 | 0.545000 | 1033 | 521530 | 37.792000 | 401 | 0.029000 | 43 |
| 3 | Brazil | Americas | 30040129 | 14132.580000 | 157732 | 74.206000 | 27331 | 660528 | 310.750000 | 1287 | 0.605000 | 216 |
| 4 | France | Europe | 25623682 | 39397.193000 | 926709 | 1424.843000 | 161950 | 139706 | 214.802000 | 755 | 1.161000 | 128 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 233 | Pitcairn Islands | Western Pacific | 0 | 0.000000 | 0 | 0.000000 | 0 | 0 | 0.000000 | 0 | 0.000000 | 0 |
| 234 | Saint Helena | Africa | 0 | 0.000000 | 0 | 0.000000 | 0 | 0 | 0.000000 | 0 | 0.000000 | 0 |
| 235 | Tokelau | Western Pacific | 0 | 0.000000 | 0 | 0.000000 | 0 | 0 | 0.000000 | 0 | 0.000000 | 0 |
| 236 | Turkmenistan | Europe | 0 | 0.000000 | 0 | 0.000000 | 0 | 0 | 0.000000 | 0 | 0.000000 | 0 |
| 237 | Tuvalu | Western Pacific | 0 | 0.000000 | 0 | 0.000000 | 0 | 0 | 0.000000 | 0 | 0.000000 | 0 |
238 rows × 12 columns
Col = ["Cases - cumulative total per 100000 population","Cases - newly reported in last 7 days per 100000 population","Deaths - cumulative total per 100000 population","Deaths - newly reported in last 7 days per 100000 population"]
# required_columns = ["Name", "WHO Region","Cases - cumulative total", "Cases - newly reported in last 7 days", "Cases - newly reported in last 24 hours","Deaths - cumulative total","Deaths - newly reported in last 7 days", "Deaths - newly reported in last 24 hours"]
covid_df = df.drop(Col, axis=1)
# covid_df = covid_df.sort_values("Cases - cumulative total")
covid_df
| Name | WHO Region | Cases - cumulative total | Cases - newly reported in last 7 days | Cases - newly reported in last 24 hours | Deaths - cumulative total | Deaths - newly reported in last 7 days | Deaths - newly reported in last 24 hours | |
|---|---|---|---|---|---|---|---|---|
| 0 | Global | abs | 493392853 | 7968332 | 1157995 | 6165833 | 23607 | 3930 |
| 1 | United States of America | Americas | 79501007 | 207355 | 35049 | 975540 | 3657 | 610 |
| 2 | India | South-East Asia | 43031958 | 7518 | 1033 | 521530 | 401 | 43 |
| 3 | Brazil | Americas | 30040129 | 157732 | 27331 | 660528 | 1287 | 216 |
| 4 | France | Europe | 25623682 | 926709 | 161950 | 139706 | 755 | 128 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 233 | Pitcairn Islands | Western Pacific | 0 | 0 | 0 | 0 | 0 | 0 |
| 234 | Saint Helena | Africa | 0 | 0 | 0 | 0 | 0 | 0 |
| 235 | Tokelau | Western Pacific | 0 | 0 | 0 | 0 | 0 | 0 |
| 236 | Turkmenistan | Europe | 0 | 0 | 0 | 0 | 0 | 0 |
| 237 | Tuvalu | Western Pacific | 0 | 0 | 0 | 0 | 0 | 0 |
238 rows × 8 columns
new_df = covid_df
uniqueValsList = list(set(df["WHO Region"]))
Continents = {}
for i in uniqueValsList:
Continents[i] = []
for i in range(len(df)):
Continents[df["WHO Region"][i]].append(df["Name"][i])
del Continents['abs']
del Continents['Other']
listconti = list(Continents.keys())
Countries = []
for i in Continents:
Countries.append(len(Continents[i]))
Countries
fig = plt.figure(figsize = (20,5))
plt.bar(listconti,Countries,color='blue',width = 0.2)
plt.xlabel("WHO Region")
plt.ylabel("Countries")
plt.title("Number of Countries in each Region")
myexplode = [0.1,0.1,0.1,0.1,0.1,0.1]
plt.title("Number of Countries in each Region")
ax=plt.figure()
ax.set_facecolor('blue')
plt.pie(Countries,labels= listconti,startangle=90,explode = myexplode,shadow=True)
plt.show()
Total Cumulative cases
Cases_per_region = {}
for i in listconti:
Cases_per_region[i]=0
for i in range(len(covid_df)):
for j in range(len(listconti)):
if df["WHO Region"][i] == listconti[j]:
Cases_per_region[listconti[j]]+=covid_df["Cases - cumulative total"][i]
break
x=list(Cases_per_region.keys())
y=list(Cases_per_region.values())
fig = plt.figure(figsize = (20,5))
plt.bar(x,y,color='blue',width = 0.2)
plt.xlabel("WHO Region")
plt.ylabel("Cases")
# plt.title("Number of Cases in each Region")
myexplode = [0.1,0.1,0.1,0.1,0.1,0.1]
plt.title("Number of Cases in each Region")
ax=plt.figure()
ax.set_facecolor('blue')
plt.pie(y,labels= x,startangle=90,explode = myexplode,shadow=True)
plt.show()
Cases_per_region_7 = {}
for i in listconti:
Cases_per_region_7[i]=0
for i in range(len(covid_df)):
for j in range(len(listconti)):
if df["WHO Region"][i] == listconti[j]:
Cases_per_region_7[listconti[j]]+=covid_df["Cases - newly reported in last 7 days"][i]
break
x=list(Cases_per_region_7.keys())
y=list(Cases_per_region_7.values())
fig = plt.figure(figsize = (20,5))
plt.bar(x,y,color='blue',width = 0.2)
plt.xlabel("WHO Region")
plt.ylabel("Cases")
# plt.title("Number of Cases in each Region")
myexplode = [0.1,0.1,0.1,0.1,0.1,0.1]
plt.title("Number of Cases in each Region in last 7 days")
ax=plt.figure()
ax.set_facecolor('blue')
plt.pie(y,labels= x,startangle=90,explode = myexplode,shadow=True)
plt.show()
Case_per_region_24 = {}
for i in listconti:
Case_per_region_24[i]=0
for i in range(len(covid_df)):
for j in range(len(listconti)):
if df["WHO Region"][i] == listconti[j]:
Case_per_region_24[listconti[j]]+=covid_df["Cases - newly reported in last 24 hours"][i]
break
x=list(Case_per_region_24.keys())
y=list(Case_per_region_24.values())
fig = plt.figure(figsize = (20,5))
plt.bar(x,y,color='blue',width = 0.2)
plt.xlabel("WHO Region")
plt.ylabel("Cases")
# plt.title("Number of Cases in each Region")
myexplode = [0.1,0.1,0.1,0.1,0.1,0.1]
plt.title("Number of Cases in each Region in last 24 hours")
ax=plt.figure()
ax.set_facecolor('blue')
plt.pie(y,labels= x,startangle=90,explode = myexplode,shadow=True)
plt.show()
T = {}
for i in Cases_per_region:
T[i] = [Cases_per_region_7[i],Case_per_region_24[i]]
X = []
y1 = []
y2 = []
cnt = 0
for i in Case_per_region_24:
X.append(cnt+2)
y1.append(Cases_per_region_7[i])
y2.append(Case_per_region_24[i])
cnt+=2
labels = list(Case_per_region_24.keys())
fig = plt.figure(figsize=(20,5))
plt.ylabel("Cases in million")
plt.xlabel("WHO Region")
plt.title("Trend of Covid-19 Cases")
plt.plot(X,y1,label = "Last 7 Days")
plt.plot(X,y2,label = "Last 24 Hours")
plt.xticks(X,labels)
plt.legend()
plt.show()
y3 = []
for i in range(len(y1)):
y3.append(y2[i]/y1[i])
fig = plt.figure(figsize=(20,5))
plt.plot(X,y3,label = "Ratio of Case in last 24 hours to last 7 days")
plt.xlabel("WHO Region")
plt.ylabel("Ratio")
plt.xticks(X,labels)
plt.legend()
plt.show()
y3
[0.15027388861779462, 0.16017252249332714, 0.09321024419297201, 0.1372273576070679, 0.144270942744439, 0.14464966780054914]
From this we can say that the rise in case in last 24 hours is very less in Africa while it has been extremely high for the American and European Countries. Now let us move to the deaths due to Covid-19.
Deaths_perRegion = {}
Deaths_perRegion_7 = {}
Deaths_perRegion_24 = {}
for i in listconti:
Deaths_perRegion[i]=0
Deaths_perRegion_7[i]=0
Deaths_perRegion_24[i]=0
for i in range(len(covid_df)):
for j in range(len(listconti)):
if df["WHO Region"][i] == listconti[j]:
Deaths_perRegion[listconti[j]]+=covid_df["Deaths - cumulative total"][i]
Deaths_perRegion_7[listconti[j]]+=covid_df["Deaths - newly reported in last 7 days"][i]
Deaths_perRegion_24[listconti[j]]+=covid_df["Deaths - newly reported in last 24 hours"][i]
break
x=list(Deaths_perRegion.keys())
y=list(Deaths_perRegion.values())
fig = plt.figure(figsize = (20,5))
plt.bar(x,y,color='blue',width = 0.2)
plt.xlabel("WHO Region")
plt.ylabel("Deaths")
plt.title("Number of Deaths in each Region")
plt.show()
myexplode = [0.1,0.1,0.1,0.1,0.1,0.1]
plt.title("Number of Deaths in each Region")
plt.pie(y,labels= x,startangle=90,explode = myexplode,shadow=True)
fig.set_facecolor('blue')
plt.show()
x=list(Deaths_perRegion_7.keys())
y=list(Deaths_perRegion_7.values())
fig = plt.figure(figsize = (20,5))
plt.bar(x,y,color='blue',width = 0.2)
plt.xlabel("WHO Region")
plt.ylabel("Deaths")
plt.title("Number of Deaths in each Region in last 7 days")
plt.show()
myexplode = [0.1,0.1,0.1,0.1,0.1,0.1]
plt.title("Number of Deaths in each Region in last 7 days")
plt.pie(y,labels= x,startangle=90,explode = myexplode,shadow=True)
fig.set_facecolor('blue')
plt.show()
x=list(Deaths_perRegion_24.keys())
y=list(Deaths_perRegion_24.values())
fig = plt.figure(figsize = (20,5))
plt.bar(x,y,color='blue',width = 0.2)
plt.xlabel("WHO Region")
plt.ylabel("Deaths")
plt.title("Number of Deaths in each Region in last 24 hours")
plt.show()
myexplode = [0.1,0.1,0.1,0.1,0.1,0.1]
plt.title("Number of Deaths in each Region in last 24 hours")
plt.pie(y,labels= x,startangle=90,explode = myexplode,shadow=True)
fig.set_facecolor('blue')
plt.show()
x = []
y1 = []
y2 = []
y3 = []
cnt = 0
for i in listconti:
x.append(cnt+2)
y1.append(Case_per_region_24[i])
y2.append(Deaths_perRegion_24[i])
a = 100*Deaths_perRegion_24[i]/Case_per_region_24[i]
y3.append(a)
cnt+=2
fig = plt.figure(figsize = (20,5))
plt.plot(x,y3)
plt.xlabel("WHO Region")
plt.ylabel("Fatality Rate in percent")
plt.title("Deaths and Cases per Region")
plt.xticks(x,labels)
plt.show()
covid_df = covid_df.sort_values("Cases - cumulative total")
top_five = covid_df[len(covid_df)-6:len(covid_df)-1]
top_five = top_five.sort_values("Cases - cumulative total")
top_five
| Name | WHO Region | Cases - cumulative total | Cases - newly reported in last 7 days | Cases - newly reported in last 24 hours | Deaths - cumulative total | Deaths - newly reported in last 7 days | Deaths - newly reported in last 24 hours | |
|---|---|---|---|---|---|---|---|---|
| 5 | Germany | Europe | 22265788 | 1161279 | 201729 | 131036 | 1645 | 328 |
| 4 | France | Europe | 25623682 | 926709 | 161950 | 139706 | 755 | 128 |
| 3 | Brazil | Americas | 30040129 | 157732 | 27331 | 660528 | 1287 | 216 |
| 2 | India | South-East Asia | 43031958 | 7518 | 1033 | 521530 | 401 | 43 |
| 1 | United States of America | Americas | 79501007 | 207355 | 35049 | 975540 | 3657 | 610 |
top_five_country = list(top_five["Name"])
top_five_country
['Germany', 'France', 'Brazil', 'India', 'United States of America']
Although Europe has very high number of cases in Recent times but the fatality rate is still pretty less as compared to other WHO Regions. The Fatality rate is highest in American Continent. Thus the distribution of vaccine can be proposed on the basis of this rates.
date_df = pd.read_csv("./data/WHO-COVID-19-global-data.csv")
date_df
| Date_reported | Country_code | Country | WHO_region | New_cases | Cumulative_cases | New_deaths | Cumulative_deaths | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-03 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
| 1 | 2020-01-04 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
| 2 | 2020-01-05 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
| 3 | 2020-01-06 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
| 4 | 2020-01-07 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 195757 | 2022-04-03 | ZW | Zimbabwe | AFRO | 195 | 246481 | 2 | 5446 |
| 195758 | 2022-04-04 | ZW | Zimbabwe | AFRO | 44 | 246525 | 0 | 5446 |
| 195759 | 2022-04-05 | ZW | Zimbabwe | AFRO | 87 | 246612 | 5 | 5451 |
| 195760 | 2022-04-06 | ZW | Zimbabwe | AFRO | 132 | 246744 | 0 | 5451 |
| 195761 | 2022-04-07 | ZW | Zimbabwe | AFRO | 59 | 246803 | 2 | 5453 |
195762 rows × 8 columns
Country = {}
dates = []
for i in range(len(date_df)):
if date_df["Country"][i] in Country:
Country[date_df["Country"][i]].append(date_df["New_cases"][i])
else:
Country[date_df["Country"][i]] = [date_df["New_cases"][i]]
if date_df["Date_reported"][i] not in dates:
dates.append(date_df["Date_reported"][i])
# len(dates)
# Country
Months = ["January","February","March","April","May","June","July","August","September","October","November","December"]
for l in top_five_country:
DATE = []
x=[]
cnt = 0
y=[]
for i in range(len(dates)//30):
DATE.append(Months[int(i%12)])
y.append(0)
for i in range(len(list(Country[l]))):
x.append(cnt+1)
cnt+=1
fig = plt.figure(figsize = (20,5))
plt.plot(x,list(Country[l]),color="black")
xd = []
flag = 20
for i in range(7):
xd.append(flag)
flag+=120
DATE = ["January 23, 2020","May 23,2020","September 20,2020","January 18,2020","May 20,2021","September 17,2021","January 15,2022"]
plt.xticks(xd,DATE)
plt.title("Covid Cases in "+str(l))
plt.xlabel("Timeline")
plt.ylabel("New Cases per Day")
plt.show()
Country_d = {}
dates = []
for i in range(len(date_df)):
if date_df["Country"][i] in Country_d:
Country_d[date_df["Country"][i]].append(date_df["New_deaths"][i])
else:
Country_d[date_df["Country"][i]] = [date_df["New_deaths"][i]]
if date_df["Date_reported"][i] not in dates:
dates.append(date_df["Date_reported"][i])
len(dates)
Months = ["January","February","March","April","May","June","July","August","September","October","November","December"]
for l in top_five_country:
DATE = []
x=[]
cnt = 0
y=[]
for i in range(len(dates)//30):
DATE.append(Months[int(i%12)])
y.append(0)
for i in range(len(list(Country_d[l]))):
x.append(cnt+1)
cnt+=1
fig = plt.figure(figsize = (20,5))
plt.plot(x,list(Country_d[l]),color="black")
xd = []
flag = 20
for i in range(7):
xd.append(flag)
flag+=120
DATE = ["January 23, 2020","May 23,2020","September 20,2020","January 18,2020","May 20,2021","September 17,2021","January 15,2022"]
plt.xticks(xd,DATE)
plt.title("New Deaths in "+str(l))
plt.xlabel("Timeline")
plt.ylabel("New Deaths")
plt.show()
As we can see from the above graphs that the peak in covid-19 cases and deaths are distinct for all the five countries. In India we can observe three waves (September 2020, May 2021, January 2022) in which the most devastating wave was the second wave.